library(AnNet)
#> Loading required package: igraph
#>
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:stats':
#>
#> decompose, spectrum
#> The following object is masked from 'package:base':
#>
#> union
#> Loading required package: poweRlaw
#> Loading required package: org.Hs.eg.db
#> Loading required package: AnnotationDbi
#> Loading required package: stats4
#> Loading required package: BiocGenerics
#>
#> Attaching package: 'BiocGenerics'
#> The following objects are masked from 'package:igraph':
#>
#> normalize, path, union
#> The following objects are masked from 'package:stats':
#>
#> IQR, mad, sd, var, xtabs
#> The following objects are masked from 'package:base':
#>
#> anyDuplicated, append, as.data.frame, basename, cbind, colnames,
#> dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
#> grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
#> order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
#> rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
#> union, unique, unsplit, which.max, which.min
#> Loading required package: Biobase
#> Welcome to Bioconductor
#>
#> Vignettes contain introductory material; view with
#> 'browseVignettes()'. To cite Bioconductor, see
#> 'citation("Biobase")', and for packages 'citation("pkgname")'.
#> Loading required package: IRanges
#> Loading required package: S4Vectors
#> Warning: package 'S4Vectors' was built under R version 4.1.3
#>
#> Attaching package: 'S4Vectors'
#> The following objects are masked from 'package:base':
#>
#> expand.grid, I, unname
#>
#> Loading required package: latex2exp
#> Loading required package: RSpectra
#>
#> snapshotDate(): 2022-02-22
#> loading from cache
#>
#> Attaching package: 'AnNet'
#> The following object is masked from 'package:igraph':
#>
#> permute
library(synaptome.db)
#> Loading required package: synaptome.data
#> Loading required package: AnnotationHub
#> Loading required package: BiocFileCache
#> Loading required package: dbplyr
#>
#> Attaching package: 'AnnotationHub'
#> The following object is masked from 'package:Biobase':
#>
#> cache
library(ggplot2)
library(synaptome.db)
library(pander)
library(ggrepel)The package allows building the network from the data frame, where the rows correspond to the edges of the graph, or for the list of nodes (genes), for which the information will be retrieved from the SynaptomeDB.
The command builds the graph from provided data frame, simplifies the graph (removing multiple edges and loops) and finds the MCC (maximum connected component)
file <- system.file("extdata", "PPI_Presynaptic.csv", package = "AnNet")
tbl <- read.csv(file, sep="\t")
head(tbl)
#> entA entB We
#> 1 273 1759 1
#> 2 273 1785 1
#> 3 273 26052 1
#> 4 273 824 1
#> 5 273 161 1
#> 6 273 1020 1
gg <- buildNetwork(tbl)
summary(gg)
#> IGRAPH 906f9b2 UN-- 1780 6620 --
#> + attr: name (v/c)
cid<-match('Presynaptic',getCompartments()$Name) # Let's get the ID for presynaptic compartment
cid
#> [1] 2
t<-getAllGenes4Compartment(cid) # Now we need to collect all the gene IDs for presinaptic compartment
dim(t)
#> [1] 2304 8
head(t)
#> # A tibble: 6 × 8
#> GeneID MGI HumanEntrez MouseEntrez RatEntrez HumanName MouseName RatName
#> <int> <chr> <int> <int> <int> <chr> <chr> <chr>
#> 1 1 MGI:1277… 1742 13385 29495 DLG4 Dlg4 Dlg4
#> 2 2 MGI:88256 815 12322 25400 CAMK2A Camk2a Camk2a
#> 3 3 MGI:96568 9118 226180 24503 INA Ina Ina
#> 4 4 MGI:98388 6711 20742 305614 SPTBN1 Sptbn1 Sptbn1
#> 5 5 MGI:88257 816 12323 24245 CAMK2B Camk2b Camk2b
#> 6 6 MGI:1344… 1740 23859 64053 DLG2 Dlg2 Dlg2
gg<-buildFromSynaptomeByEntrez(t$HumanEntrez) # finally, build the graph using respecctive gene EntrezIDs as node IDs
summary(gg)
#> IGRAPH 8ddb28e UN-- 1780 6620 --
#> + attr: name (v/c)
file <- system.file("extdata", "PPI_Presynaptic.gml", package = "AnNet")
gg1 <- igraph::read.graph(file,format="gml")
summary(gg1)
#> IGRAPH d04bca5 UN-- 2169 8673 --
#> + attr: id (v/n), name (v/c), GeneName (v/c), UniProt (v/c), louvain
#> | (v/n), TopOntoOVG (v/c), TopOntoOVGHDOID (v/c)Adding gene names to nodes.
gg<-annotateGeneNames(gg)
#> 'select()' returned 1:1 mapping between keys and columns
summary(gg)
#> IGRAPH 8ddb28e UN-- 1780 6620 --
#> + attr: name (v/c), GeneName (v/c)
head(V(gg))
#> + 6/1780 vertices, named, from 8ddb28e:
#> [1] 273 6455 1759 1785 26052 2923
head(V(gg)$GeneName)
#> [1] "AMPH" "SH3GL1" "DNM1" "DNM2" "DNM3" "PDIA3"Adding diseases associations from HDO database
afile<-system.file("extdata", "flatfile_human_gene2HDO.csv", package = "AnNet")
dis <- read.table(afile,sep="\t",skip=1,header=F,strip.white=T,quote="")
gg <- annotate_topOnto_ovg(gg, dis)
summary(gg)
#> IGRAPH 8ddb28e UN-- 1780 6620 --
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c)##Shanno Adding synaptic function annotated from еру Shanno et al.,paper
sfile<-system.file("extdata", "SCH_flatfile.csv", package = "AnNet")
shan <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg<-annotate_SCHanno(gg,shan)
summary(sgg)
#> IGRAPH 8ddb28e UN-- 1780 6620 --
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), Schanno (v/c)This is manually curated presynaptic genes specific functional annotation based on the paper of Boyken at al., 2019
{r annotate_Chua, eval=FALSE}
sfile<-system.file("extdata", "PresynAn.csv", package = "AnNet")
pres <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg <- annotate_CHUA(gg, pres)
summary(sgg)
Adding functionality from GO: BP, MF,CC
sfile<-system.file("extdata", "flatfile.go.BP.csv", package = "AnNet")
goBP <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg <- annotate_go_bp(gg, goBP)
summary(sgg)
#> IGRAPH 8ddb28e UN-- 1780 6620 --
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), GO_BP_ID (v/c), GO_BP (v/c)
sfile<-system.file("extdata", "flatfile.go.MF.csv", package = "AnNet")
goMF <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg <- annotate_go_mf(gg, goMF)
summary(sgg)
#> IGRAPH 8ddb28e UN-- 1780 6620 --
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), GO_MF_ID (v/c), GO_MF (v/c)
sfile<-system.file("extdata", "flatfile.go.CC.csv", package = "AnNet")
goCC <- read.table(sfile,sep="\t",skip=1,header=F,strip.white=T,quote="")
sgg <- annotate_go_cc(gg, goCC)
summary(sgg)
#> IGRAPH 8ddb28e UN-- 1780 6620 --
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), GO_CC_ID (v/c), GO_CC (v/c)#Esimate node centrality measures ## Estimate centrality measures with values added as node attributes. Centrality measures are as following:DEG - degree, BET - betweenness, CC - clustering coefficient, SL - semilocal centrality, mnSP - mean shortest path, PR - page rank, sdSP - standard deviation of the shortest path
gg <- calcCentrality(gg)
summary(gg)
#> IGRAPH 8ddb28e UN-- 1780 6620 --
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), DEG (v/c), BET (v/c), CC (v/c), SL (v/c),
#> | mnSP (v/c), PR (v/c), sdSP (v/c)##Get node centralities as a matrix. The same centrality measures as before are presented in a matrix form.
mc <- getCentralityMatrix(gg)
head(mc)
#> ID DEG BET CC SL mnSP PR sdSP
#> [1,] "273" "12" "833.233" "0.106" "44656" "3.424" "0.000786" "0.707"
#> [2,] "6455" "22" "5444.71" "0.082" "170211" "2.955" "0.001337" "0.683"
#> [3,] "1759" "16" "1744.932" "0.217" "150771" "3.062" "0.000962" "0.687"
#> [4,] "1785" "27" "10994.509" "0.071" "218717" "2.89" "0.001644" "0.701"
#> [5,] "26052" "6" "291.138" "0.133" "55060" "3.503" "0.000405" "0.743"
#> [6,] "2923" "7" "1225.709" "0.095" "55104" "3.391" "0.000531" "0.758"##Get the centrality measures for random graph Sometimes one needs to compare the graph properties of the given network with the ones of the randomized graph. Command below provides three ways of randomization, including G(n,p) Erdos-Renyi model , Barabasi-Albert model and new random graph from a given graph by randomly adding/removing edges.
{r}
ggrm <- getRandomGraphCentrality(sgg, type = c("cgnp"))
head(ggrm)
To examine the networks for underlying structure (i.e. not random), one can test network’s degree distribution for evidence of scale-free structure and compare it against the randomised network model. For this we used the R “PoweRlaw” package (version 0.50.0) (Gillespie, 2015). In the example below we found evidence for disassortative mixing (Newman, 2002), i.e. a preference for high-degree genes to attach to low-degree gene(presynaptic: -0.16).
pFit <- FitDegree( as.vector(igraph::degree(graph=sgg)),plot=TRUE,WIDTH=2480, HEIGHT=2480)
## Get entropy rate Evidence for scale-free structure can also be tested
by performing a perturbation analysis on each network (Teschendorff et
al, 2014). In this analysis each protein was perturbed through
over-expression (red) and under-expression (green), with the global
entropy rate (SR) after each proteins pertubation being plotted against
the log of the proteins degree, as shown at the plot below. In the
example network we observed a bi-modal response, between gene
over-expression and degree, and opposing bi-phasic response relative to
over/under-expression between global entropy rate and degree. This type
of bi-modal, bi-phasic behaviour has been observed only in networks with
scale-free or approximate scale-free topology (Teschendorff et al,
2014)[80].
ent <- getEntropyRate(gg)
ent
#> $maxSr
#> [1] 3.822764
#>
#> $SRo
#> [1] 2.639026
SRprime <- getEntropy(gg, maxSr = NULL)
head(SRprime)
#> ENTREZ.ID GENE.NAME DEGREE UP DOWN
#> [1,] "273" "AMPH" "12" "0.6877989113741" "0.690315321671207"
#> [2,] "6455" "SH3GL1" "22" "0.688466483598983" "0.689906842691128"
#> [3,] "1759" "DNM1" "16" "0.689220589967989" "0.689957716069586"
#> [4,] "1785" "DNM2" "27" "0.689632772969398" "0.689577514448685"
#> [5,] "26052" "DNM3" "6" "0.689143168786511" "0.690296867048524"
#> [6,] "2923" "PDIA3" "7" "0.688882651999579" "0.690313211229183"
plotEntropy(SRprime, subTIT = "Entropy", SRo = ent$SRo, maxSr = ent$maxSr)
#> Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
#> "none")` instead.
## Get modularity. Normalised modularity. Normalised modularity allows
comparison the networks with different structure. Qm based on the
previous studies by Parter et al., 2007, Takemoto, 2012,Takemoto, 2013,
Takemoto and Borjigin, 2011, which was defined as: Qm =
(Qreal-Qrand)/(Qmax-Qrand) Where Qreal is the network modularity of a
real-world signaling network and,Qrand is the average network modularity
value obtained from 10,000 randomized networks constructed from its
real-world network. Qmax was estimated as: 1 − 1/M, where M is the
number of modules in the real network. Randomized networks were
generated from a real-world network using the edge-rewiring algorithm
(Maslov and Sneppen, 2002).
#' cid<-match('Presynaptic',getCompartments()$Name)
#' t<-getAllGenes4Compartment(cid)
#' gg<-buildFromSynaptomeByEntrez(t$HumanEntrez)
nm<-normModularity(gg,alg='louvain')
nm
#> [1] 0.02605522Clustering, or community detection, in networks has been well studied in the field of statistical physics with particular attention to methods developed for social science networks. The underlying assumption(s) of what makes a community in social science, translates remarkably well to what we think of as a community (sub-complex, module or cluster) in PPI networks The possible algorithms of choice implemented in the package are: “lec”(‘Leading-Eigenvector, Newman, 2006), “wt”(Walktrap, Pons & Latapy, 2006), “fc”(Fast-Greedy Community’ algorithm, Clauset et al., 2004), “infomap” (InfoMAP, Rosvall et al., 2007; Rosvall et al., 2010), “louvain” (Louvain, Blondel et al., 2008), “sgG1”, “sgG2”, “sgG5”(SpinGlass, Reichardt & Bornholdt). For each algorithm of interest the membership could be obtained with calcMembership command. All algorithm implementations, apart from Spectral were performed using the publicly available R package igraph (Csardi & Nepusz, 2006) (R version 3.4.2, igraph version 1.1.2). Parameters used in the fc, lec, sg, wt and lourvain algorithms were chosen as to maximise the measure Modularity (Newman & Girvan, 2004); infomap seeks the optimal community structure in the data by maximising the objective function called the Minimum Description Length (Rissanen, 1978; Grwald et al., 2005)
calcAllClustering allows to calculate memberships for all clustering algorithms simultaneously (may take time), and store them on the graph vertices. In this case, calcMembership command will return the community membership for specific algorithm.
Alternatively, calcClustering allows to calculate clustering for selected algorithm.
#ggc <- calcAllClustering(gg)
#summary(ggc)
#mem <- calcMembership(ggc, alg = c("lec"))# choose one algorithm from the list
#head(mem)
alg = "louvain"
gg <- calcClustering(gg, alg)
gg
#> IGRAPH 8ddb28e UN-- 1780 6620 --
#> + attr: name (v/c), GeneName (v/c), TopOnto_OVG (v/c),
#> | TopOnto_OVG_HDO_ID (v/c), DEG (v/c), BET (v/c), CC (v/c), SL (v/c),
#> | mnSP (v/c), PR (v/c), sdSP (v/c), louvain (v/c)
#> + edges from 8ddb28e (vertex names):
#> [1] 273 --1759 273 --1785 273 --26052 273 --824 273 --161
#> [6] 273 --1020 273 --5530 273 --274 273 --5532 273 --25977
#> [11] 273 --8867 273 --9901 6455--1759 6455--1785 6455--26052
#> [16] 6455--6456 6455--7534 6455--10015 6455--6457 6455--7431
#> [21] 6455--392 6455--23176 6455--5689 6455--10570 6455--10059
#> [26] 6455--6812 6455--3778 6455--57030 6455--114569 6455--192683
#> + ... omitted several edgesFor assessing the robustness of the clustering randomization study could be performed, which applies the same clustering algorithm to N perturbed networks and returns the consensus matrix where each pair of nodes will be assigned the probability to belong to the same cluster.
conmat <- makeConsensusMatrix(gg, N=100,alg = alg, type = 2, mask = 10,reclust = FALSE, Cnmin = -1, Cnmax = 10)#Build consensus matrix for louvain clustering##Consensus matrix value distribution Consensus matrix value distribution could be visualised in the following way:
steps <- 100
Fn <- ecdf(conmat[lower.tri(conmat)])
X<-seq(0,1,length.out=steps+1)
cdf<-Fn(X)
dt<-data.frame(cons=X,cdf=cdf)
ggplot(dt,aes(x=cons,y=cdf))+geom_line()+
theme(
axis.title.x=element_text(face="bold",size=rel(2.5)),
axis.title.y=element_text(face="bold",size=rel(2.5)),
legend.title=element_text(face="bold",size=rel(1.5)),
legend.text=element_text(face="bold",size=rel(1.5)),
legend.key=element_blank())+
theme(panel.grid.major = element_line(colour="grey40",size=0.2),
panel.grid.minor = element_line(colour="grey40",size=0.1),
panel.background = element_rect(fill="white"),
panel.border = element_rect(linetype="solid",fill=NA))
## Cluster robustness Cluster robustness assess the robustness of the
obtained clusters and could help to evaluate the “goodness” of chosen
clustering algorithm.
clrob<-getRobustness(gg, alg = alg, conmat)
pander(clrob)| alg | C | Cn | Crob | CrobScaled |
|---|---|---|---|---|
| louvain | 1 | 163 | 0.09295 | 0.2793 |
| louvain | 2 | 207 | 0.08939 | 0.1456 |
| louvain | 3 | 224 | 0.08654 | 0.03879 |
| louvain | 4 | 194 | 0.08864 | 0.1175 |
| louvain | 5 | 65 | 0.09194 | 0.2415 |
| louvain | 6 | 151 | 0.09128 | 0.2164 |
| louvain | 7 | 117 | 0.08858 | 0.1151 |
| louvain | 8 | 99 | 0.09016 | 0.1746 |
| louvain | 9 | 89 | 0.0867 | 0.04472 |
| louvain | 10 | 124 | 0.08585 | 0.01266 |
| louvain | 11 | 116 | 0.08706 | 0.05805 |
| louvain | 12 | 47 | 0.0864 | 0.03345 |
| louvain | 13 | 58 | 0.09294 | 0.279 |
| louvain | 14 | 87 | 0.08551 | 0 |
| louvain | 15 | 39 | 0.1122 | 1 |
Bridging proteins are known to interact with many neighbours simultaneously, organise function inside communities they belong to, but also affect/influence other communities in the network (Nepusz et al., 2008).Bridgeness is estimated from the consensus clustering estimated above and vertex degree to calculate the vertex’s community membership, i.e. the probability of the specific node to belong to every communities obtained by given clustering algorithm. The Bridgeness measure lies between 0, implying a vertex clearly belongs in a single community, and 1, implying a vertex forms a ‘global bridge’ across every community with the same strength.
br<-getBridgeness(gg,alg = alg,conmat)
#> calculating Bridgeness for: louvain
pander(head(br))| ENTREZ.ID | GENE.NAME | BRIDGENESS.louvain |
|---|---|---|
| 273 | AMPH | 0.0938476465095556 |
| 6455 | SH3GL1 | 0.478160277229497 |
| 1759 | DNM1 | 0.0184896145823757 |
| 1785 | DNM2 | 0.427626561008655 |
| 26052 | DNM3 | 0.150920248660597 |
| 2923 | PDIA3 | 0.581464396485594 |
Semi-local centrality measure (Chen et al., 2011) also lies between 0 and 1 indicating whether protein is important globally or locally. By plotting Bridgeness against semi-local centrality we can categorises the influence each protein found in our network has on the overall network structure: Region 1, proteins having a ‘global’ rather than ‘local’ influence in the network (also been called bottle-neck bridges, connector or kinless hubs (0<Sl<0.5; 0.5<Br<1). Region 2, proteins having ‘global’ and ‘local’ influence (0.5<Sl<1, 0.5<Br<1). Region 3, proteins centred within the community they belong to, but also communicating with a few other specific communities (0<Sl<0.5; 0.1<Br<0.5). Region 4, proteins with ‘local’ impact , primarily within one or two communities (local or party hubs, 0.5<Sl<1, 0<Br<0.5).
VIPs=c('8495','22999','8927','8573','26059','8497','27445','8499')
# VIPs=c('81876','10890','51552','5874','5862','11021','54734','5865','5864',
# '9522','192683','10067','10396','9296','527','9114','537','535',
# '528','51382','534','51606','523','80331','114569','127262','57084',
# '57030','388662','6853','6854','8224','9900','9899','9145','9143',
# '6855','132204','6857','127833','6861','529','526','140679','7781',
# '81615','6844','6843')
indx <- match(V(gg)$name,VIPs)
group <- ifelse( is.na(indx), 0,1)
MainDivSize <- 0.8
xmin <- 0
xmax <- 1
ymin <- 0
ymax <- 1
Xlab <- "Semilocal Centrality (SL)"
Ylab <- "Bridgeness (B)"
X <- as.numeric(igraph::get.vertex.attribute(gg,"SL",V(gg)))
X <- scale(X)
Y <- as.numeric(as.vector(br[,dim(br)[2]]))
lbls <- ifelse(!is.na(indx),V(gg)$GeneName,"")
dt<-data.frame(X=X,Y=Y,vips=group,entres=V(gg)$name,name=V(gg)$GeneName)
dt_vips<-dt[dt$vips==1,]
dt_res<-dt[dt$vips==0,]
##--- baseColor of genes
baseColor="royalblue2"
##--- SPcolor, colour highlighting any 'specical' genes
SPColor="royalblue2"
##--- PSDColor, colour of core PSD95 interactor genes
PSDColor="magenta"
ggplot(dt,aes(x=X,y=Y,label=name))+#geom_point()+
geom_point(data=dt_vips,
aes(x=X,y=Y),colour=baseColor,size = 7,shape=15,show.legend=F)+
geom_point(data=dt_res,
aes(x=X,y=Y, alpha=(X*Y)), size = 3,shape=16,show.legend=F)+
geom_label_repel(aes(label=as.vector(lbls)),
fontface='bold',color='black',fill='white',box.padding=0.1,
point.padding=NA,label.padding=0.15,segment.color='black',
force=1,size=rel(3.8),show.legend=F,max.overlaps=200)+
labs(x=Xlab,y=Ylab,title=sprintf("%s",alg))+
scale_x_continuous(expand = c(0, 0), limits = c(xmin, xmax)) +
scale_y_continuous(expand = c(0, 0), limits = c(ymin, ymax))+
theme(
axis.title.x=element_text(face="bold",size=rel(2.5)),
axis.title.y=element_text(face="bold",size=rel(2.5)),
legend.title=element_text(face="bold",size=rel(1.5)),
legend.text=element_text(face="bold",size=rel(1.5)),
legend.key=element_blank())+
theme(panel.grid.major = element_line(colour="grey40",size=0.2),
panel.grid.minor = element_line(colour="grey40",size=0.1),
panel.background = element_rect(fill="white"),
panel.border = element_rect(linetype="solid",fill=NA))+
geom_vline(xintercept=0.5,colour="grey40",size=MainDivSize,linetype=2,show.legend=F)+
geom_hline(yintercept=0.5,colour="grey40",size=MainDivSize,linetype=2,show.legend=F)
#> Warning: Removed 6 rows containing missing values (geom_point).
#> Warning: Removed 1384 rows containing missing values (geom_point).
#> Warning: Removed 1390 rows containing missing values (geom_label_repel).
##Interactive view of bridgeness plot
library(plotly)
#>
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#>
#> last_plot
#> The following object is masked from 'package:latex2exp':
#>
#> TeX
#> The following object is masked from 'package:AnnotationDbi':
#>
#> select
#> The following object is masked from 'package:IRanges':
#>
#> slice
#> The following object is masked from 'package:S4Vectors':
#>
#> rename
#> The following object is masked from 'package:igraph':
#>
#> groups
#> The following object is masked from 'package:stats':
#>
#> filter
#> The following object is masked from 'package:graphics':
#>
#> layout
p<-ggplot(dt,aes(x=X,y=Y,label=name))+#geom_point()+
geom_point(data=dt_vips,
aes(x=X,y=Y),colour=baseColor,shape=15,show.legend=F)+
geom_point(data=dt_res,
aes(x=X,y=Y, alpha=(X*Y)),shape=16,show.legend=F)+
labs(x=Xlab,y=Ylab,title=sprintf("%s",alg))+
scale_x_continuous(expand = c(0, 0), limits = c(xmin, xmax)) +
scale_y_continuous(expand = c(0, 0), limits = c(ymin, ymax))+
theme(
axis.title.x=element_text(face="bold",size=rel(2.5)),
axis.title.y=element_text(face="bold",size=rel(2.5)),
legend.title=element_text(face="bold",size=rel(1.5)),
legend.text=element_text(face="bold",size=rel(1.5)),
legend.key=element_blank())+
theme(panel.grid.major = element_line(colour="grey40",size=0.2),
panel.grid.minor = element_line(colour="grey40",size=0.1),
panel.background = element_rect(fill="white"),
panel.border = element_rect(linetype="solid",fill=NA))+
geom_vline(xintercept=0.5,colour="grey40",size=MainDivSize,linetype=2,show.legend=F)+
geom_hline(yintercept=0.5,colour="grey40",size=MainDivSize,linetype=2,show.legend=F)
ggplotly(p)
#> Warning: `gather_()` was deprecated in tidyr 1.2.0.
#> Please use `gather()` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.Given that Disease associated genes are connected within the graph, the common question is to check whether the networks for two different diseases are overlapping, which may indicate the common molecular mechanisms. Same is valid for any pair of annotations, e.g. one would ask if two different biological functions are related. To address the question, we have utilised the algorithm from Menche et al, which estimates the minimal shortest paths between two distinct annotations and compare it to the randomly annotated graph. Below example shows the estimation of disease separation for the following diseases: DOID:10652 (Alzheimer’s_disease), (bipolar_disorder), DOID:12849 (autistic_disorder), DOID:1826 (epilepsy) Command calcDiseasePairs quickly estimates the two annotation separation on the graph and compares it with one randomly reannotated graph. This could be used for initial guess of the relationships between the annotations. To assess the significance of the obtained separation values the command runPermDisease should be used, where the user can dfine the number of permutations. The command execution will take a while, depending on this number and return the table with p-value, p.adjusted, q-value and Bonferroni test.
p <- calcDiseasePairs(
gg,
name = "TopOnto_OVG_HDO_ID",
diseases = c("DOID:10652","DOID:3312","DOID:12849"),
permute = "r"
)
pander(p$disease_separation)| DOID:10652 | DOID:3312 | DOID:12849 | |
|---|---|---|---|
| DOID:10652 | 0 | -0.1262 | 0.009981 |
| DOID:3312 | NA | 0 | -0.1249 |
| DOID:12849 | NA | NA | 0 |
r <- runPermDisease(
gg,
name = "TopOnto_OVG_HDO_ID",
diseases = c("DOID:10652","DOID:3312","DOID:12849","DOID:1826"),
Nperm = 100,
alpha = c(0.05, 0.01, 0.001)
)
pander(r$Disease_overlap_sig)| HDO.ID | N | HDO.ID | N | sAB | Separated | Overlap |
|---|---|---|---|---|---|---|
| DOID:10652 | 259 | DOID:10652 | 259 | 0 | . | . |
| DOID:10652 | 259 | DOID:3312 | 127 | -0.249547559530832 | . | YES |
| DOID:10652 | 259 | DOID:12849 | 95 | -0.102817396216147 | . | YES |
| DOID:10652 | 259 | DOID:1826 | 138 | -0.284488456101642 | . | YES |
| DOID:3312 | 127 | DOID:3312 | 127 | 0 | . | . |
| DOID:3312 | 127 | DOID:12849 | 95 | -0.366460202432022 | . | YES |
| DOID:3312 | 127 | DOID:1826 | 138 | -0.257697996938242 | . | YES |
| DOID:12849 | 95 | DOID:12849 | 95 | 0 | . | . |
| DOID:12849 | 95 | DOID:1826 | 138 | -0.318736802820636 | . | YES |
| DOID:1826 | 138 | DOID:1826 | 138 | 0 | . | . |
| zScore | pvalue | Separation/Overlap.than.chance |
|---|---|---|
| 0 | 1 | larger |
| -2.00776703602753 | 0.0446680532767752 | Smaller |
| -0.865094412556766 | 0.386986971211123 | Smaller |
| -3.14633064385253 | 0.00165332980625333 | Smaller |
| 0 | 1 | larger |
| -3.7357241762782 | 0.000187175745854292 | Smaller |
| -2.13281673628389 | 0.0329397627784495 | Smaller |
| 0 | 1 | larger |
| -3.51054019849682 | 0.000447197187033641 | Smaller |
| 0 | 1 | larger |
| Bonferroni | p.adjusted | q-value |
|---|---|---|
| . | 1 | 1 |
| . | 0.261662620028474 | 0.0893361065535504 |
| . | 1 | 0.644978285351872 |
| * | 0.0161418350528516 | 0.00551109935417777 |
| . | 1 | 1 |
| ** | 0.00548231817520051 | 0.00187175745854292 |
| . | 0.241198798678309 | 0.0823494069461237 |
| . | 1 | 1 |
| ** | 0.00654913182042719 | 0.0022359859351682 |
| . | 1 | 1 |